{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import io\n",
    "import os\n",
    "import math\n",
    "import copy\n",
    "import pickle\n",
    "import zipfile\n",
    "from textwrap import wrap\n",
    "from pathlib import Path\n",
    "from itertools import zip_longest\n",
    "from collections import defaultdict\n",
    "from urllib.error import URLError\n",
    "from urllib.request import urlopen\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F\n",
    "from torch.optim.lr_scheduler import _LRScheduler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.style.use('ggplot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_random_seed(state=1):\n",
    "    gens = (np.random.seed, torch.manual_seed, torch.cuda.manual_seed)\n",
    "    for set_state in gens:\n",
    "        set_state(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "RANDOM_STATE = 1\n",
    "set_random_seed(RANDOM_STATE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def try_download(url, download_path):\n",
    "    archive_name = url.split('/')[-1]\n",
    "    folder_name, _ = os.path.splitext(archive_name)\n",
    "    \n",
    "    try:\n",
    "        r = urlopen(url)\n",
    "    except URLError as e:\n",
    "        print(\"Cannot download the data. Error: %s\" % s)\n",
    "        return\n",
    "    \n",
    "    assert r.status == 200\n",
    "    data = r.read()\n",
    "    \n",
    "    with zipfile.ZipFile(io.BytesIO(data)) as arch:\n",
    "        arch.extractall(download_path)\n",
    "        \n",
    "    print(\"The archive is extracted into folder: %s\" % download_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_data(path):\n",
    "    files = {}\n",
    "    for filename in path.glob('*'):\n",
    "        if filename.suffix == '.csv':\n",
    "            files[filename.stem] = pd.read_csv(filename)\n",
    "        elif filename.suffix == '.dat':\n",
    "            if filename.stem == 'ratings':\n",
    "                columns = ['userId', 'movieId', 'rating', 'timestamp']\n",
    "            else:\n",
    "                columns = ['movieId', 'title', 'genres']\n",
    "            data = pd.read_csv(filename, sep='::', names=columns, engine='python')\n",
    "            files[filename.stem] = data\n",
    "    return files['ratings'], files['movies']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "download_path = Path.cwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings, movies = read_data(download_path / 'ml-latest-small')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964982703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964981247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964982224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>47</td>\n",
       "      <td>5.0</td>\n",
       "      <td>964983815</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>964982931</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating  timestamp\n",
       "0       1        1     4.0  964982703\n",
       "1       1        3     4.0  964981247\n",
       "2       1        6     4.0  964982224\n",
       "3       1       47     5.0  964983815\n",
       "4       1       50     5.0  964982931"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratings.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movieId</th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Adventure|Animation|Children|Comedy|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children|Fantasy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movieId                               title  \\\n",
       "0        1                    Toy Story (1995)   \n",
       "1        2                      Jumanji (1995)   \n",
       "2        3             Grumpier Old Men (1995)   \n",
       "3        4            Waiting to Exhale (1995)   \n",
       "4        5  Father of the Bride Part II (1995)   \n",
       "\n",
       "                                        genres  \n",
       "0  Adventure|Animation|Children|Comedy|Fantasy  \n",
       "1                   Adventure|Children|Fantasy  \n",
       "2                               Comedy|Romance  \n",
       "3                         Comedy|Drama|Romance  \n",
       "4                                       Comedy  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tabular_preview(ratings, n=15):\n",
    "    user_groups = ratings.groupby('userId')['rating'].count()\n",
    "    top_users = user_groups.sort_values(ascending=False)[:15]\n",
    "    \n",
    "    movie_groups = ratings.groupby('movieId')['rating'].count()\n",
    "    top_movies = movie_groups.sort_values(ascending=False)[:15]\n",
    "    \n",
    "    top = (ratings.join(top_users, rsuffix='_r', how='inner', on='userId').join(top_movies, rsuffix='_r', how='inner', on='movieId'))\n",
    "    \n",
    "    return pd.crosstab(top.userId, top.movieId, top.rating, aggfunc=np.sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>movieId</th>\n",
       "      <th>1</th>\n",
       "      <th>50</th>\n",
       "      <th>110</th>\n",
       "      <th>260</th>\n",
       "      <th>296</th>\n",
       "      <th>318</th>\n",
       "      <th>356</th>\n",
       "      <th>480</th>\n",
       "      <th>527</th>\n",
       "      <th>589</th>\n",
       "      <th>593</th>\n",
       "      <th>1196</th>\n",
       "      <th>2571</th>\n",
       "      <th>2858</th>\n",
       "      <th>2959</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>2.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>182</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>249</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>307</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>380</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>387</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>414</th>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>448</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>474</th>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>599</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>603</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>2.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "movieId  1     50    110   260   296   318   356   480   527   589   593   \\\n",
       "userId                                                                      \n",
       "68        2.5   3.0   2.5   5.0   2.0   3.0   3.5   3.5   4.0   3.5   3.5   \n",
       "182       4.0   4.5   3.5   3.5   5.0   4.5   5.0   3.5   4.0   2.0   4.5   \n",
       "249       4.0   4.0   5.0   5.0   4.0   4.5   4.5   4.0   4.5   4.0   4.0   \n",
       "274       4.0   4.0   4.5   3.0   5.0   4.5   4.5   3.5   4.0   4.5   4.0   \n",
       "288       4.5   NaN   5.0   5.0   5.0   5.0   5.0   2.0   5.0   4.0   5.0   \n",
       "307       4.0   4.5   3.5   3.5   4.5   4.5   4.0   3.5   4.5   2.5   4.5   \n",
       "380       5.0   4.0   4.0   5.0   5.0   3.0   5.0   5.0   NaN   5.0   5.0   \n",
       "387       NaN   4.5   3.5   4.5   5.0   3.5   4.0   3.0   NaN   3.5   4.0   \n",
       "414       4.0   5.0   5.0   5.0   5.0   5.0   5.0   4.0   4.0   5.0   4.0   \n",
       "448       5.0   4.0   NaN   5.0   5.0   NaN   3.0   3.0   NaN   3.0   5.0   \n",
       "474       4.0   4.0   3.0   4.0   4.0   5.0   3.0   4.5   5.0   4.0   4.5   \n",
       "599       3.0   3.5   3.5   5.0   5.0   4.0   3.5   4.0   NaN   4.5   3.0   \n",
       "603       4.0   NaN   1.0   4.0   5.0   NaN   3.0   NaN   3.0   NaN   5.0   \n",
       "606       2.5   4.5   3.5   4.5   5.0   3.5   4.0   2.5   5.0   3.5   4.5   \n",
       "610       5.0   4.0   4.5   5.0   5.0   3.0   3.0   5.0   3.5   5.0   4.5   \n",
       "\n",
       "movieId  1196  2571  2858  2959  \n",
       "userId                           \n",
       "68        5.0   4.5   5.0   2.5  \n",
       "182       3.0   5.0   5.0   5.0  \n",
       "249       5.0   5.0   4.5   5.0  \n",
       "274       4.5   4.0   5.0   5.0  \n",
       "288       4.5   3.0   NaN   3.5  \n",
       "307       3.0   3.5   4.0   4.0  \n",
       "380       5.0   4.5   NaN   4.0  \n",
       "387       4.5   4.0   4.5   4.5  \n",
       "414       5.0   5.0   5.0   5.0  \n",
       "448       5.0   2.0   4.0   4.0  \n",
       "474       5.0   4.5   3.5   4.0  \n",
       "599       5.0   5.0   5.0   5.0  \n",
       "603       3.0   5.0   5.0   4.0  \n",
       "606       4.5   5.0   4.5   5.0  \n",
       "610       5.0   5.0   3.5   5.0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tabular_preview(ratings, movies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        userId  movieId  rating   timestamp\n",
      "0            1        1     4.0   964982703\n",
      "1            1        3     4.0   964981247\n",
      "2            1        6     4.0   964982224\n",
      "3            1       47     5.0   964983815\n",
      "4            1       50     5.0   964982931\n",
      "5            1       70     3.0   964982400\n",
      "6            1      101     5.0   964980868\n",
      "7            1      110     4.0   964982176\n",
      "8            1      151     5.0   964984041\n",
      "9            1      157     5.0   964984100\n",
      "10           1      163     5.0   964983650\n",
      "11           1      216     5.0   964981208\n",
      "12           1      223     3.0   964980985\n",
      "13           1      231     5.0   964981179\n",
      "14           1      235     4.0   964980908\n",
      "15           1      260     5.0   964981680\n",
      "16           1      296     3.0   964982967\n",
      "17           1      316     3.0   964982310\n",
      "18           1      333     5.0   964981179\n",
      "19           1      349     4.0   964982563\n",
      "20           1      356     4.0   964980962\n",
      "21           1      362     5.0   964982588\n",
      "22           1      367     4.0   964981710\n",
      "23           1      423     3.0   964982363\n",
      "24           1      441     4.0   964980868\n",
      "25           1      457     5.0   964981909\n",
      "26           1      480     4.0   964982346\n",
      "27           1      500     3.0   964981208\n",
      "28           1      527     5.0   964984002\n",
      "29           1      543     4.0   964981179\n",
      "...        ...      ...     ...         ...\n",
      "100806     610   150401     3.0  1479543210\n",
      "100807     610   152077     4.0  1493845817\n",
      "100808     610   152081     4.0  1493846503\n",
      "100809     610   152372     3.5  1493848841\n",
      "100810     610   155064     3.5  1493848456\n",
      "100811     610   156371     5.0  1479542831\n",
      "100812     610   156726     4.5  1493848444\n",
      "100813     610   157296     4.0  1493846563\n",
      "100814     610   158238     5.0  1479545219\n",
      "100815     610   158721     3.5  1479542491\n",
      "100816     610   158872     3.5  1493848024\n",
      "100817     610   158956     3.0  1493848947\n",
      "100818     610   159093     3.0  1493847704\n",
      "100819     610   160080     3.0  1493848031\n",
      "100820     610   160341     2.5  1479545749\n",
      "100821     610   160527     4.5  1479544998\n",
      "100822     610   160571     3.0  1493848537\n",
      "100823     610   160836     3.0  1493844794\n",
      "100824     610   161582     4.0  1493847759\n",
      "100825     610   161634     4.0  1493848362\n",
      "100826     610   162350     3.5  1493849971\n",
      "100827     610   163937     3.5  1493848789\n",
      "100828     610   163981     3.5  1493850155\n",
      "100829     610   164179     5.0  1493845631\n",
      "100830     610   166528     4.0  1493879365\n",
      "100831     610   166534     4.0  1493848402\n",
      "100832     610   168248     5.0  1493850091\n",
      "100833     610   168250     5.0  1494273047\n",
      "100834     610   168252     5.0  1493846352\n",
      "100835     610   170875     3.0  1493846415\n",
      "\n",
      "[100836 rows x 4 columns]\n",
      "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1a24ee6d30>\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "'Column not found: ratings'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-35-5d959f26bd53>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'userId'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'userId'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'ratings'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mratings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'userId'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'ratings'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.7/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    273\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    274\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 275\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Column not found: {key}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    276\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_gotitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mndim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'Column not found: ratings'"
     ]
    }
   ],
   "source": [
    "print(ratings)\n",
    "print(ratings.groupby('userId'))\n",
    "print(ratings.groupby('userId')['ratings'])\n",
    "print(ratings.groupby('userId')['ratings'].count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "userId\n",
      "1       232\n",
      "2        29\n",
      "3        39\n",
      "4       216\n",
      "5        44\n",
      "6       314\n",
      "7       152\n",
      "8        47\n",
      "9        46\n",
      "10      140\n",
      "11       64\n",
      "12       32\n",
      "13       31\n",
      "14       48\n",
      "15      135\n",
      "16       98\n",
      "17      105\n",
      "18      502\n",
      "19      703\n",
      "20      242\n",
      "21      443\n",
      "22      119\n",
      "23      121\n",
      "24      110\n",
      "25       26\n",
      "26       21\n",
      "27      135\n",
      "28      570\n",
      "29       81\n",
      "30       34\n",
      "       ... \n",
      "581      40\n",
      "582      56\n",
      "583      56\n",
      "584      83\n",
      "585      61\n",
      "586     208\n",
      "587     165\n",
      "588      56\n",
      "589      40\n",
      "590     728\n",
      "591      54\n",
      "592      94\n",
      "593     103\n",
      "594     232\n",
      "595      20\n",
      "596     411\n",
      "597     443\n",
      "598      21\n",
      "599    2478\n",
      "600     763\n",
      "601     101\n",
      "602     135\n",
      "603     943\n",
      "604     100\n",
      "605     221\n",
      "606    1115\n",
      "607     187\n",
      "608     831\n",
      "609      37\n",
      "610    1302\n",
      "Name: rating, Length: 610, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(ratings.groupby('userId')['rating'].count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
